{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3",
   "language": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "source": [
    "# 用test_format1训练模型并保存"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pyspark\n",
    "from pyspark import SparkContext\n",
    "from pyspark import SparkConf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "conf=SparkConf().setAppName(\"miniProject\").setMaster(\"local\").set(\"spark.executor.memory\",\"3g\")\\\n",
    "        .set(\"spark.executor.instances\",\"2\")\n",
    "sc=SparkContext.getOrCreate(conf)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.mllib.util import MLUtils"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "PythonRDD[4] at RDD at PythonRDD.scala:53"
      ]
     },
     "metadata": {},
     "execution_count": 4
    }
   ],
   "source": [
    "training = MLUtils.loadLibSVMFile(sc, \"hdfs://node1:9000/user/root/exp4/procd_train_real\")\n",
    "\n",
    "# Split data into training (60%) and test (40%)\n",
    "#training, test = data.randomSplit([0.6, 0.4], seed=11)\n",
    "training.cache()\n"
   ]
  },
  {
   "source": [
    "## Logistic Regression"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.mllib.classification import LogisticRegressionWithLBFGS, LogisticRegressionModel\n",
    "#from pyspark.mllib.evaluation import BinaryClassificationMetrics\n",
    "# Logistic Regression\n",
    "# Run training algorithm to build the model\n",
    "model = LogisticRegressionWithLBFGS.train(training)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Training Error = 0.06215499263984298\n"
     ]
    }
   ],
   "source": [
    "# Evaluating the model on training data\n",
    "labelsAndPreds = training.map(lambda p: (p.label, model.predict(p.features)))\n",
    "trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(training.count())\n",
    "print(\"Training Error = \" + str(trainErr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(sc, \"hdfs://node1:9000/user/root/exp4/models/LogisticRegressionModel\")\n",
    "# sameModel = LogisticRegressionModel.load(sc, \"hdfs://node1:9000/user/root/exp4/models/LogisticRegressionModel\")"
   ]
  },
  {
   "source": [
    "## SVM"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# SVM\n",
    "# Build the model\n",
    "from pyspark.mllib.classification import SVMWithSGD, SVMModel\n",
    "model = SVMWithSGD.train(training, iterations=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Training Error = 0.08762036923454367\n"
     ]
    }
   ],
   "source": [
    "# Evaluating the model on training data\n",
    "labelsAndPreds = training.map(lambda p: (p.label, model.predict(p.features)))\n",
    "trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(training.count())\n",
    "print(\"Training Error = \" + str(trainErr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(sc, \"hdfs://node1:9000/user/root/exp4/models/SVMWithSGDModel\")\n",
    "# sameModel = SVMModel.load(sc, \"hdfs://node1:9000/user/root/exp4/models/SVMWithSGDModel\")"
   ]
  },
  {
   "source": [
    "## Decision Tree"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# DecisionTree\n",
    "from pyspark.mllib.tree import DecisionTree, DecisionTreeModel\n",
    "# Train a DecisionTree model.\n",
    "#  Empty categoricalFeaturesInfo indicates all features are continuous.\n",
    "model = DecisionTree.trainClassifier(training, numClasses=2, categoricalFeaturesInfo={},\n",
    "                                         impurity='gini', maxDepth=5, maxBins=32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Training Error = 0.06113913763493621\n"
     ]
    }
   ],
   "source": [
    "# Evaluating the model on training data\n",
    "predictions = model.predict(training.map(lambda x: x.features))\n",
    "labelsAndPreds = training.map(lambda lp: lp.label).zip(predictions)\n",
    "trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(training.count())\n",
    "print(\"Training Error = \" + str(trainErr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Learned classification tree model:\nDecisionTreeModel classifier of depth 5 with 25 nodes\n  If (feature 3 <= 8.5)\n   If (feature 8 <= 1.5)\n    If (feature 3 <= 2.5)\n     If (feature 0 <= 3.5)\n      Predict: 0.0\n     Else (feature 0 > 3.5)\n      If (feature 4 <= 3.5)\n       Predict: 0.0\n      Else (feature 4 > 3.5)\n       Predict: 1.0\n    Else (feature 3 > 2.5)\n     Predict: 0.0\n   Else (feature 8 > 1.5)\n    Predict: 0.0\n  Else (feature 3 > 8.5)\n   If (feature 3 <= 19.5)\n    If (feature 8 <= 1.5)\n     Predict: 0.0\n    Else (feature 8 > 1.5)\n     If (feature 5 <= 4.5)\n      Predict: 0.0\n     Else (feature 5 > 4.5)\n      If (feature 2 <= 12.5)\n       Predict: 1.0\n      Else (feature 2 > 12.5)\n       Predict: 0.0\n   Else (feature 3 > 19.5)\n    If (feature 5 <= 7.5)\n     Predict: 0.0\n    Else (feature 5 > 7.5)\n     If (feature 3 <= 65.0)\n      Predict: 0.0\n     Else (feature 3 > 65.0)\n      If (feature 8 <= 4.5)\n       Predict: 0.0\n      Else (feature 8 > 4.5)\n       Predict: 1.0\n\n"
     ]
    }
   ],
   "source": [
    "print('Learned classification tree model:')\n",
    "print(model.toDebugString())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save and load model\n",
    "model.save(sc, \"hdfs://node1:9000/user/root/exp4/models/myDecisionTreeClassificationModel\")\n",
    "# sameModel = DecisionTreeModel.load(sc, \"hdfs://node1:9000/user/root/exp4/models/myDecisionTreeClassificationModel\")"
   ]
  },
  {
   "source": [
    "## Naive Bayes"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "import shutil\n",
    "from pyspark.mllib.classification import NaiveBayes, NaiveBayesModel\n",
    "# Train a naive Bayes model.\n",
    "model = NaiveBayes.train(training, 1.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Training Error = 0.10636576913640824\n"
     ]
    }
   ],
   "source": [
    "# Evaluating the model on training data\n",
    "labelsAndPreds = training.map(lambda p: (p.label, model.predict(p.features)))\n",
    "trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(training.count())\n",
    "print(\"Training Error = \" + str(trainErr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save and load model\n",
    "model.save(sc, \"hdfs://node1:9000/user/root/exp4/models/NaiveBayesModel\")\n",
    "# sameModel = NaiveBayesModel.load(sc, \"hdfs://node1:9000/user/root/exp4/models/NaiveBayesModel\")"
   ]
  },
  {
   "source": [
    "## Random Forest"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.mllib.tree import RandomForest, RandomForestModel\n",
    "# Train a RandomForest model.\n",
    "#  Empty categoricalFeaturesInfo indicates all features are continuous.\n",
    "#  Note: Use larger numTrees in practice.\n",
    "#  Setting featureSubsetStrategy=\"auto\" lets the algorithm choose.\n",
    "model = RandomForest.trainClassifier(training, numClasses=2, categoricalFeaturesInfo={},\n",
    "                                        numTrees=3, featureSubsetStrategy=\"auto\",\n",
    "                                        impurity='gini', maxDepth=4, maxBins=32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Training Error = 0.06115063788027478\n"
     ]
    }
   ],
   "source": [
    "# Evaluating the model on training data\n",
    "predictions = model.predict(training.map(lambda x: x.features))\n",
    "labelsAndPreds = training.map(lambda lp: lp.label).zip(predictions)\n",
    "trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(training.count())\n",
    "print(\"Training Error = \" + str(trainErr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Learned classification forest model:\nTreeEnsembleModel classifier with 3 trees\n\n  Tree 0:\n    Predict: 0.0\n  Tree 1:\n    Predict: 0.0\n  Tree 2:\n    If (feature 4 <= 2.5)\n     Predict: 0.0\n    Else (feature 4 > 2.5)\n     If (feature 3 <= 19.5)\n      If (feature 3 <= 8.5)\n       Predict: 0.0\n      Else (feature 3 > 8.5)\n       If (feature 5 <= 18.5)\n        Predict: 0.0\n       Else (feature 5 > 18.5)\n        Predict: 1.0\n     Else (feature 3 > 19.5)\n      Predict: 0.0\n\n"
     ]
    }
   ],
   "source": [
    "print('Learned classification forest model:')\n",
    "print(model.toDebugString())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save and load model\n",
    "model.save(sc, \"hdfs://node1:9000/user/root/exp4/models/myRandomForestClassificationModel\")\n",
    "# sameModel = RandomForestModel.load(sc, \"hdfs://node1:9000/user/root/exp4/models/myRandomForestClassificationModel\")"
   ]
  },
  {
   "source": [
    "## Gradient Boosted Trees"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.mllib.tree import GradientBoostedTrees, GradientBoostedTreesModel\n",
    "# Train a GradientBoostedTrees model.\n",
    "#  Notes: (a) Empty categoricalFeaturesInfo indicates all features are continuous.\n",
    "#         (b) Use more iterations in practice.\n",
    "model = GradientBoostedTrees.trainClassifier(training,\n",
    "                                                categoricalFeaturesInfo={}, numIterations=30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Training Error = 0.061142971050049066\n"
     ]
    }
   ],
   "source": [
    "# Evaluating the model on training data\n",
    "predictions = model.predict(training.map(lambda x: x.features))\n",
    "labelsAndPreds = training.map(lambda lp: lp.label).zip(predictions)\n",
    "trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(training.count())\n",
    "print(\"Training Error = \" + str(trainErr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "tags": [
     "outputPrepend"
    ]
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "\n     Else (feature 8 > 1.5)\n      If (feature 6 <= 10.5)\n       Predict: -0.32301894307771395\n      Else (feature 6 > 10.5)\n       Predict: 0.16717204043018258\n    Else (feature 4 > 2.5)\n     If (feature 4 <= 7.5)\n      If (feature 5 <= 3.5)\n       Predict: -0.2941068285134676\n      Else (feature 5 > 3.5)\n       Predict: -0.20809792196806162\n     Else (feature 4 > 7.5)\n      If (feature 5 <= 26.5)\n       Predict: -0.11963992385736985\n      Else (feature 5 > 26.5)\n       Predict: 1.1170996374172817\n  Tree 2:\n    If (feature 4 <= 2.5)\n     If (feature 8 <= 1.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.35234887156188743\n      Else (feature 0 > 3.5)\n       Predict: -0.3227571761025556\n     Else (feature 8 > 1.5)\n      If (feature 6 <= 10.5)\n       Predict: -0.2890697339853412\n      Else (feature 6 > 10.5)\n       Predict: 0.1467771630807103\n    Else (feature 4 > 2.5)\n     If (feature 8 <= 3.5)\n      If (feature 5 <= 3.5)\n       Predict: -0.27113921230756444\n      Else (feature 5 > 3.5)\n       Predict: -0.17997254914139127\n     Else (feature 8 > 3.5)\n      If (feature 3 <= 31.5)\n       Predict: -0.12000221567793165\n      Else (feature 3 > 31.5)\n       Predict: 0.20262702319720824\n  Tree 3:\n    If (feature 4 <= 3.5)\n     If (feature 8 <= 1.5)\n      If (feature 3 <= 1.5)\n       Predict: -0.3220711727323084\n      Else (feature 3 > 1.5)\n       Predict: -0.2920517432614221\n     Else (feature 8 > 1.5)\n      If (feature 3 <= 3.5)\n       Predict: -0.27023012865498003\n      Else (feature 3 > 3.5)\n       Predict: -0.21971371374855878\n    Else (feature 4 > 3.5)\n     If (feature 6 <= 8.5)\n      If (feature 4 <= 9.5)\n       Predict: -0.2024140105804298\n      Else (feature 4 > 9.5)\n       Predict: -0.05081123985958545\n     Else (feature 6 > 8.5)\n      If (feature 4 <= 17.5)\n       Predict: 0.012824532746686327\n      Else (feature 4 > 17.5)\n       Predict: 0.6314430055282801\n  Tree 4:\n    If (feature 4 <= 2.5)\n     If (feature 8 <= 1.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.2947276279827312\n      Else (feature 0 > 3.5)\n       Predict: -0.2672771963934841\n     Else (feature 8 > 1.5)\n      If (feature 6 <= 2.5)\n       Predict: -0.24929747073919478\n      Else (feature 6 > 2.5)\n       Predict: -0.20638846794888285\n    Else (feature 4 > 2.5)\n     If (feature 8 <= 3.5)\n      If (feature 5 <= 5.5)\n       Predict: -0.21210307189269242\n      Else (feature 5 > 5.5)\n       Predict: -0.09837470705172761\n     Else (feature 8 > 3.5)\n      If (feature 3 <= 31.5)\n       Predict: -0.08753495430102674\n      Else (feature 3 > 31.5)\n       Predict: 0.18244270334369653\n  Tree 5:\n    If (feature 4 <= 2.5)\n     If (feature 3 <= 1.5)\n      If (feature 2 <= 1.5)\n       Predict: -0.2477602117062285\n      Else (feature 2 > 1.5)\n       Predict: -0.2762544253530495\n     Else (feature 3 > 1.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.2507861227519274\n      Else (feature 0 > 3.5)\n       Predict: -0.21333958920091542\n    Else (feature 4 > 2.5)\n     If (feature 4 <= 5.5)\n      If (feature 8 <= 1.5)\n       Predict: -0.21358819252164613\n      Else (feature 8 > 1.5)\n       Predict: -0.16204981793566964\n     Else (feature 4 > 5.5)\n      If (feature 8 <= 2.5)\n       Predict: -0.13470641139681627\n      Else (feature 8 > 2.5)\n       Predict: -0.002358420057629618\n  Tree 6:\n    If (feature 3 <= 5.5)\n     If (feature 8 <= 1.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.25239804561688667\n      Else (feature 0 > 3.5)\n       Predict: -0.22687144507288687\n     Else (feature 8 > 1.5)\n      If (feature 1 <= 0.5)\n       Predict: -0.174221209109144\n      Else (feature 1 > 0.5)\n       Predict: -0.22107713982982788\n    Else (feature 3 > 5.5)\n     If (feature 4 <= 5.5)\n      If (feature 5 <= 2.5)\n       Predict: -0.19964873441465217\n      Else (feature 5 > 2.5)\n       Predict: -0.14368419131975654\n     Else (feature 4 > 5.5)\n      If (feature 5 <= 12.5)\n       Predict: -0.09255155931126256\n      Else (feature 5 > 12.5)\n       Predict: 0.1962148403733965\n  Tree 7:\n    If (feature 4 <= 2.5)\n     If (feature 8 <= 1.5)\n      If (feature 3 <= 1.5)\n       Predict: -0.233660962914513\n      Else (feature 3 > 1.5)\n       Predict: -0.20906106066522057\n     Else (feature 8 > 1.5)\n      If (feature 6 <= 10.5)\n       Predict: -0.1753140413532053\n      Else (feature 6 > 10.5)\n       Predict: 0.22663775980774656\n    Else (feature 4 > 2.5)\n     If (feature 4 <= 10.5)\n      If (feature 8 <= 1.5)\n       Predict: -0.17359382147125516\n      Else (feature 8 > 1.5)\n       Predict: -0.10545112508272057\n     Else (feature 4 > 10.5)\n      If (feature 5 <= 24.5)\n       Predict: 0.03171248513307941\n      Else (feature 5 > 24.5)\n       Predict: 0.70747468944607\n  Tree 8:\n    If (feature 3 <= 5.5)\n     If (feature 8 <= 1.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.21774089584854125\n      Else (feature 0 > 3.5)\n       Predict: -0.19349248439509295\n     Else (feature 8 > 1.5)\n      If (feature 8 <= 3.5)\n       Predict: -0.164285373397181\n      Else (feature 8 > 3.5)\n       Predict: -0.08488834589073323\n    Else (feature 3 > 5.5)\n     If (feature 8 <= 2.5)\n      If (feature 5 <= 4.5)\n       Predict: -0.15966948922502291\n      Else (feature 5 > 4.5)\n       Predict: -0.08476361702823224\n     Else (feature 8 > 2.5)\n      If (feature 5 <= 1.5)\n       Predict: -0.1662239874137037\n      Else (feature 5 > 1.5)\n       Predict: -0.003301650968574977\n  Tree 9:\n    If (feature 4 <= 2.5)\n     If (feature 8 <= 1.5)\n      If (feature 3 <= 1.5)\n       Predict: -0.20282990102853965\n      Else (feature 3 > 1.5)\n       Predict: -0.17921725632774865\n     Else (feature 8 > 1.5)\n      If (feature 6 <= 2.5)\n       Predict: -0.16099836231905457\n      Else (feature 6 > 2.5)\n       Predict: -0.12023948597374984\n    Else (feature 4 > 2.5)\n     If (feature 3 <= 58.5)\n      If (feature 8 <= 3.5)\n       Predict: -0.12680887063580656\n      Else (feature 8 > 3.5)\n       Predict: -0.022696446598858423\n     Else (feature 3 > 58.5)\n      If (feature 5 <= 25.5)\n       Predict: 0.15289715044804408\n      Else (feature 5 > 25.5)\n       Predict: 0.7944849239113788\n  Tree 10:\n    If (feature 3 <= 3.5)\n     If (feature 8 <= 1.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.1913240456013554\n      Else (feature 0 > 3.5)\n       Predict: -0.17119151062720972\n     Else (feature 8 > 1.5)\n      If (feature 1 <= 0.5)\n       Predict: -0.12787058963150896\n      Else (feature 1 > 0.5)\n       Predict: -0.18587180279692916\n    Else (feature 3 > 3.5)\n     If (feature 4 <= 5.5)\n      If (feature 5 <= 2.5)\n       Predict: -0.1479551509846623\n      Else (feature 5 > 2.5)\n       Predict: -0.1031325703517493\n     Else (feature 4 > 5.5)\n      If (feature 8 <= 1.5)\n       Predict: -0.102689869820952\n      Else (feature 8 > 1.5)\n       Predict: 0.001303330052426546\n  Tree 11:\n    If (feature 3 <= 3.5)\n     If (feature 8 <= 1.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.17902922532076776\n      Else (feature 0 > 3.5)\n       Predict: -0.15988742379385779\n     Else (feature 8 > 1.5)\n      If (feature 9 <= 5.5)\n       Predict: -0.13453258305868496\n      Else (feature 9 > 5.5)\n       Predict: 3.5700370687409304\n    Else (feature 3 > 3.5)\n     If (feature 8 <= 2.5)\n      If (feature 5 <= 4.5)\n       Predict: -0.13273684812724665\n      Else (feature 5 > 4.5)\n       Predict: -0.06389460666672396\n     Else (feature 8 > 2.5)\n      If (feature 6 <= 10.5)\n       Predict: -0.052321749635660664\n      Else (feature 6 > 10.5)\n       Predict: 0.2138413200041222\n  Tree 12:\n    If (feature 4 <= 2.5)\n     If (feature 8 <= 1.5)\n      If (feature 3 <= 1.5)\n       Predict: -0.16695353740388175\n      Else (feature 3 > 1.5)\n       Predict: -0.14521462052650033\n     Else (feature 8 > 1.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.13044285195452432\n      Else (feature 0 > 3.5)\n       Predict: -0.09403525962378233\n    Else (feature 4 > 2.5)\n     If (feature 5 <= 3.5)\n      If (feature 6 <= 3.5)\n       Predict: -0.11737127146640841\n      Else (feature 6 > 3.5)\n       Predict: -0.05299568577506127\n     Else (feature 5 > 3.5)\n      If (feature 8 <= 1.5)\n       Predict: -0.07664205009178644\n      Else (feature 8 > 1.5)\n       Predict: 0.03899431094425055\n  Tree 13:\n    If (feature 3 <= 5.5)\n     If (feature 8 <= 1.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.15750145875609003\n      Else (feature 0 > 3.5)\n       Predict: -0.13572220318880082\n     Else (feature 8 > 1.5)\n      If (feature 8 <= 3.5)\n       Predict: -0.10952379010621043\n      Else (feature 8 > 3.5)\n       Predict: -0.041188379855881065\n    Else (feature 3 > 5.5)\n     If (feature 4 <= 7.5)\n      If (feature 5 <= 2.5)\n       Predict: -0.1106699081101201\n      Else (feature 5 > 2.5)\n       Predict: -0.06446461813880315\n     Else (feature 4 > 7.5)\n      If (feature 3 <= 53.5)\n       Predict: -0.025241370116513877\n      Else (feature 3 > 53.5)\n       Predict: 0.1902862592142519\n  Tree 14:\n    If (feature 3 <= 3.5)\n     If (feature 3 <= 1.5)\n      If (feature 2 <= 1.5)\n       Predict: -0.12491344427726951\n      Else (feature 2 > 1.5)\n       Predict: -0.1521994130465644\n     Else (feature 3 > 1.5)\n      If (feature 2 <= 2.5)\n       Predict: -0.03407466538261574\n      Else (feature 2 > 2.5)\n       Predict: -0.12755805400913908\n    Else (feature 3 > 3.5)\n     If (feature 8 <= 1.5)\n      If (feature 5 <= 1.5)\n       Predict: -0.1445615434168171\n      Else (feature 5 > 1.5)\n       Predict: -0.09188170904632038\n     Else (feature 8 > 1.5)\n      If (feature 4 <= 6.5)\n       Predict: -0.0655167183464598\n      Else (feature 4 > 6.5)\n       Predict: 0.03283147726550662\n  Tree 15:\n    If (feature 4 <= 2.5)\n     If (feature 8 <= 1.5)\n      If (feature 5 <= 1.5)\n       Predict: -0.1388940122842118\n      Else (feature 5 > 1.5)\n       Predict: -0.11841006164268691\n     Else (feature 8 > 1.5)\n      If (feature 6 <= 2.5)\n       Predict: -0.10300716166984528\n      Else (feature 6 > 2.5)\n       Predict: -0.06556944677459628\n    Else (feature 4 > 2.5)\n     If (feature 6 <= 8.5)\n      If (feature 5 <= 12.5)\n       Predict: -0.07230813121022342\n      Else (feature 5 > 12.5)\n       Predict: 0.1885376364478399\n     Else (feature 6 > 8.5)\n      If (feature 8 <= 4.5)\n       Predict: 0.02815630020799189\n      Else (feature 8 > 4.5)\n       Predict: 0.18064798451076802\n  Tree 16:\n    If (feature 8 <= 1.5)\n     If (feature 4 <= 1.5)\n      If (feature 0 <= 2.5)\n       Predict: -0.14097034419210033\n      Else (feature 0 > 2.5)\n       Predict: -0.1203474005331591\n     Else (feature 4 > 1.5)\n      If (feature 5 <= 1.5)\n       Predict: -0.1280737359245635\n      Else (feature 5 > 1.5)\n       Predict: -0.07692067479320272\n    Else (feature 8 > 1.5)\n     If (feature 4 <= 5.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.09100141914873239\n      Else (feature 0 > 3.5)\n       Predict: -0.05226544021481223\n     Else (feature 4 > 5.5)\n      If (feature 5 <= 1.5)\n       Predict: -0.1378281624178676\n      Else (feature 5 > 1.5)\n       Predict: 0.05330920430720916\n  Tree 17:\n    If (feature 3 <= 3.5)\n     If (feature 1 <= 0.5)\n      If (feature 8 <= 1.5)\n       Predict: -0.11357964001619111\n      Else (feature 8 > 1.5)\n       Predict: -0.07007862294082483\n     Else (feature 1 > 0.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.14322417676689989\n      Else (feature 0 > 3.5)\n       Predict: -0.1135539515950429\n    Else (feature 3 > 3.5)\n     If (feature 8 <= 2.5)\n      If (feature 5 <= 5.5)\n       Predict: -0.08482622212063412\n      Else (feature 5 > 5.5)\n       Predict: -0.014199551685788531\n     Else (feature 8 > 2.5)\n      If (feature 5 <= 3.5)\n       Predict: -0.03396760759854778\n      Else (feature 5 > 3.5)\n       Predict: 0.07542651295922342\n  Tree 18:\n    If (feature 3 <= 5.5)\n     If (feature 8 <= 1.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.11938021303965062\n      Else (feature 0 > 3.5)\n       Predict: -0.09865120191999605\n     Else (feature 8 > 1.5)\n      If (feature 1 <= 0.5)\n       Predict: -0.05986525554393727\n      Else (feature 1 > 0.5)\n       Predict: -0.09706173529603614\n    Else (feature 3 > 5.5)\n     If (feature 5 <= 4.5)\n      If (feature 7 <= 0.5)\n       Predict: -0.05976844629053375\n      Else (feature 7 > 0.5)\n       Predict: -0.18288968445569673\n     Else (feature 5 > 4.5)\n      If (feature 8 <= 8.5)\n       Predict: -0.00360119678378293\n      Else (feature 8 > 8.5)\n       Predict: 0.7977547554855201\n  Tree 19:\n    If (feature 3 <= 3.5)\n     If (feature 3 <= 1.5)\n      If (feature 2 <= 1.5)\n       Predict: -0.09045062843255423\n      Else (feature 2 > 1.5)\n       Predict: -0.1168940919138825\n     Else (feature 3 > 1.5)\n      If (feature 2 <= 2.5)\n       Predict: -0.007373326797246026\n      Else (feature 2 > 2.5)\n       Predict: -0.09338415050866863\n    Else (feature 3 > 3.5)\n     If (feature 8 <= 1.5)\n      If (feature 5 <= 1.5)\n       Predict: -0.10874471655639707\n      Else (feature 5 > 1.5)\n       Predict: -0.06306478917679632\n     Else (feature 8 > 1.5)\n      If (feature 6 <= 10.5)\n       Predict: -0.03276849122249847\n      Else (feature 6 > 10.5)\n       Predict: 0.14461394889857312\n  Tree 20:\n    If (feature 4 <= 2.5)\n     If (feature 8 <= 1.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.10659500030276792\n      Else (feature 0 > 3.5)\n       Predict: -0.08718703861561566\n     Else (feature 8 > 1.5)\n      If (feature 6 <= 2.5)\n       Predict: -0.07500200225448439\n      Else (feature 6 > 2.5)\n       Predict: -0.04058614820275553\n    Else (feature 4 > 2.5)\n     If (feature 5 <= 1.5)\n      If (feature 1 <= 0.5)\n       Predict: -0.062348739076436746\n      Else (feature 1 > 0.5)\n       Predict: -0.12105721982744468\n     Else (feature 5 > 1.5)\n      If (feature 8 <= 4.5)\n       Predict: -0.033788297708013254\n      Else (feature 8 > 4.5)\n       Predict: 0.1269632690303273\n  Tree 21:\n    If (feature 8 <= 1.5)\n     If (feature 5 <= 2.5)\n      If (feature 1 <= 0.5)\n       Predict: -0.08977333519692089\n      Else (feature 1 > 0.5)\n       Predict: -0.10862948745915514\n     Else (feature 5 > 2.5)\n      If (feature 6 <= 1.5)\n       Predict: -0.03661725370776406\n      Else (feature 6 > 1.5)\n       Predict: -0.08425575168766186\n    Else (feature 8 > 1.5)\n     If (feature 5 <= 2.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.07303626014374924\n      Else (feature 0 > 3.5)\n       Predict: -0.0320627225624413\n     Else (feature 5 > 2.5)\n      If (feature 8 <= 4.5)\n       Predict: -0.010150864444864106\n      Else (feature 8 > 4.5)\n       Predict: 0.14536746053636623\n  Tree 22:\n    If (feature 3 <= 3.5)\n     If (feature 3 <= 1.5)\n      If (feature 2 <= 1.5)\n       Predict: -0.0758271959768998\n      Else (feature 2 > 1.5)\n       Predict: -0.10119812014439408\n     Else (feature 3 > 1.5)\n      If (feature 2 <= 2.5)\n       Predict: 0.002474876639436415\n      Else (feature 2 > 2.5)\n       Predict: -0.07863277300157498\n    Else (feature 3 > 3.5)\n     If (feature 4 <= 5.5)\n      If (feature 2 <= 6.5)\n       Predict: 0.008381853824189451\n      Else (feature 2 > 6.5)\n       Predict: -0.05879524944423387\n     Else (feature 4 > 5.5)\n      If (feature 5 <= 26.0)\n       Predict: 5.279531725083092E-4\n      Else (feature 5 > 26.0)\n       Predict: 0.646559477518426\n  Tree 23:\n    If (feature 8 <= 1.5)\n     If (feature 4 <= 1.5)\n      If (feature 0 <= 2.5)\n       Predict: -0.10078387703026483\n      Else (feature 0 > 2.5)\n       Predict: -0.08180840499841012\n     Else (feature 4 > 1.5)\n      If (feature 5 <= 1.5)\n       Predict: -0.08829713306952275\n      Else (feature 5 > 1.5)\n       Predict: -0.04426634946229776\n    Else (feature 8 > 1.5)\n     If (feature 5 <= 5.5)\n      If (feature 5 <= 1.5)\n       Predict: -0.05564802983846768\n      Else (feature 5 > 1.5)\n       Predict: -0.020350566680333644\n     Else (feature 5 > 5.5)\n      If (feature 4 <= 9.5)\n       Predict: 0.01849148408593258\n      Else (feature 4 > 9.5)\n       Predict: 0.20319303501959063\n  Tree 24:\n    If (feature 3 <= 3.5)\n     If (feature 1 <= 0.5)\n      If (feature 8 <= 1.5)\n       Predict: -0.07769995539580873\n      Else (feature 8 > 1.5)\n       Predict: -0.03887659154224262\n     Else (feature 1 > 0.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.10457771258442372\n      Else (feature 0 > 3.5)\n       Predict: -0.07755810865031441\n    Else (feature 3 > 3.5)\n     If (feature 8 <= 2.5)\n      If (feature 5 <= 5.5)\n       Predict: -0.05405679584049002\n      Else (feature 5 > 5.5)\n       Predict: 0.004674209320649082\n     Else (feature 8 > 2.5)\n      If (feature 2 <= 6.5)\n       Predict: 0.17981920338283322\n      Else (feature 2 > 6.5)\n       Predict: -0.0034866309814327143\n  Tree 25:\n    If (feature 3 <= 5.5)\n     If (feature 8 <= 1.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.08520190497328708\n      Else (feature 0 > 3.5)\n       Predict: -0.0659885164019917\n     Else (feature 8 > 1.5)\n      If (feature 8 <= 3.5)\n       Predict: -0.0474167519574663\n      Else (feature 8 > 3.5)\n       Predict: 0.011573683408063421\n    Else (feature 3 > 5.5)\n     If (feature 5 <= 4.5)\n      If (feature 7 <= 0.5)\n       Predict: -0.03406980590347756\n      Else (feature 7 > 0.5)\n       Predict: -0.1477360453033194\n     Else (feature 5 > 4.5)\n      If (feature 8 <= 8.5)\n       Predict: 0.009971118781445763\n      Else (feature 8 > 8.5)\n       Predict: 0.6165002353920106\n  Tree 26:\n    If (feature 4 <= 2.5)\n     If (feature 3 <= 1.5)\n      If (feature 2 <= 1.5)\n       Predict: -0.06004518529344669\n      Else (feature 2 > 1.5)\n       Predict: -0.0844774248917181\n     Else (feature 3 > 1.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.06811827530855709\n      Else (feature 0 > 3.5)\n       Predict: -0.04001471485105617\n    Else (feature 4 > 2.5)\n     If (feature 0 <= 4.5)\n      If (feature 8 <= 3.5)\n       Predict: -0.04142993827153367\n      Else (feature 8 > 3.5)\n       Predict: 0.02207550139458356\n     Else (feature 0 > 4.5)\n      If (feature 4 <= 15.5)\n       Predict: 0.01672572378427278\n      Else (feature 4 > 15.5)\n       Predict: 0.6952515060221467\n  Tree 27:\n    If (feature 8 <= 1.5)\n     If (feature 5 <= 2.5)\n      If (feature 1 <= 0.5)\n       Predict: -0.06604810491727528\n      Else (feature 1 > 0.5)\n       Predict: -0.08332976789718155\n     Else (feature 5 > 2.5)\n      If (feature 6 <= 1.5)\n       Predict: -0.01770773639179527\n      Else (feature 6 > 1.5)\n       Predict: -0.06299840451564359\n    Else (feature 8 > 1.5)\n     If (feature 5 <= 2.5)\n      If (feature 0 <= 3.5)\n       Predict: -0.05139058644631643\n      Else (feature 0 > 3.5)\n       Predict: -0.014959047028438423\n     Else (feature 5 > 2.5)\n      If (feature 8 <= 4.5)\n       Predict: 3.4994010732254964E-4\n      Else (feature 8 > 4.5)\n       Predict: 0.12689764130443412\n  Tree 28:\n    If (feature 3 <= 3.5)\n     If (feature 3 <= 1.5)\n      If (feature 2 <= 1.5)\n       Predict: -0.05389820117981357\n      Else (feature 2 > 1.5)\n       Predict: -0.07740069318969107\n     Else (feature 3 > 1.5)\n      If (feature 2 <= 2.5)\n       Predict: 0.01927918727955098\n      Else (feature 2 > 2.5)\n       Predict: -0.0570096853266175\n    Else (feature 3 > 3.5)\n     If (feature 4 <= 5.5)\n      If (feature 2 <= 6.5)\n       Predict: 0.021582163564201565\n      Else (feature 2 > 6.5)\n       Predict: -0.04084081390812196\n     Else (feature 4 > 5.5)\n      If (feature 7 <= 0.5)\n       Predict: 0.01583105996507969\n      Else (feature 7 > 0.5)\n       Predict: -0.18847001181018513\n  Tree 29:\n    If (feature 8 <= 1.5)\n     If (feature 4 <= 1.5)\n      If (feature 0 <= 2.5)\n       Predict: -0.0784250625640731\n      Else (feature 0 > 2.5)\n       Predict: -0.06067730057837976\n     Else (feature 4 > 1.5)\n      If (feature 5 <= 1.5)\n       Predict: -0.06780293603288737\n      Else (feature 5 > 1.5)\n       Predict: -0.027418005056003423\n    Else (feature 8 > 1.5)\n     If (feature 9 <= 15.5)\n      If (feature 5 <= 1.5)\n       Predict: -0.03801644218198048\n      Else (feature 5 > 1.5)\n       Predict: -0.0033567421370810694\n     Else (feature 9 > 15.5)\n      If (feature 4 <= 8.5)\n       Predict: -0.028978266436712925\n      Else (feature 4 > 8.5)\n       Predict: 0.5202501906306787\n\n"
     ]
    }
   ],
   "source": [
    "print('Learned classification GBT model:')\n",
    "print(model.toDebugString())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save and load model\n",
    "model.save(sc, \"hdfs://node1:9000/user/root/exp4/models/myGradientBoostingClassificationModel\")\n",
    "# sameModel = GradientBoostedTreesModel.load(sc,\n",
    "#                                             \"hdfs://node1:9000/user/root/exp4/models/myGradientBoostingClassificationModel\")"
   ]
  },
  {
   "source": [
    "# Don't forget to stop the SparkContext!"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.stop()"
   ]
  }
 ]
}